InstanceNorm

对输入张量按 实例(Instance)+ 通道(Channel) 维度执行归一化操作。 该算子在每个样本的每个通道内,基于 inner_size 维度计算均值与方差, 并结合可学习参数 gammabeta 完成缩放与偏移。

\[ \begin{align}\begin{aligned}\mu_{b,c} = \frac{1}{N} \sum_{i=1}^{N} x_{b,c,i}\\\sigma^2_{b,c} = \frac{1}{N} \sum_{i=1}^{N} x_{b,c,i}^2 - \mu_{b,c}^2\\y_{b,c,i} = \left( \frac{x_{b,c,i} - \mu_{b,c}}{\sqrt{\sigma^2_{b,c} + \epsilon}} \right) \cdot \gamma_c + \beta_c\end{aligned}\end{align} \]

其中:

  • \(b\) 表示 batch 维度

  • \(c\) 表示通道维度

  • \(i\) 表示 inner_size 维度

  • \(\gamma_c\)\(\beta_c\) 为通道级缩放与偏移参数

输入:
  • input - 输入数据地址,形状为 [batch, channel, inner_size]

  • params - 参数打包成数组,格式如下:
    • gamma - 缩放参数地址,长度为 channel

    • beta - 偏移参数地址,长度为 channel

    • batch - batch 数。

    • channel - 通道数。

    • inner_size - 每个通道内的归一化长度。

  • epsilon - 数值稳定因子。

  • core_mask - 核掩码(仅适用于共享存储版本)。

输出:
  • output - InstanceNorm 计算结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 fp32 类型

  • MT7004 支持 fp16fp32 类型

  • 归一化统计量仅在单个样本、单个通道内计算

共享存储版本:

void fp_instance_norm_s(float *input, float *output, long long *params, int core_mask, float epsilon)
void hp_instance_norm_s(half *input, half *output, long long *params, int core_mask, float epsilon)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3#include <instancenorm.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *input = (float*)0x81000000;
 7    float *gamma = (float*)0x82000000;
 8    float *beta = (float*)0x83000000;
 9    float *output = (float*)0x84000000;
10
11    int batch;
12    int channel;
13    int inner_size;
14
15    batch = 4;
16    channel = 16;
17    inner_size = 8;
18
19    float epsilon = 0.001;
20
21    long long param[10];
22    param[0] = (long long)gamma;
23    param[1] = (long long)beta;
24    param[2] = (long long)batch;
25    param[3] = (long long)channel;
26    param[4] = (long long)inner_size;
27
28    int i, j, k;
29    for (i = 0; i < batch; i++){
30        gamma[i] = 0;
31        beta[i] = 0;
32        for(j = 0; j < channel; j++){
33            for(k = 0; k < inner_size; k++){
34                input[i * channel * inner_size + j * inner_size + k] = (float)rand() / (RAND_MAX + 1.0);
35            }
36        }
37    }
38    for(i = 0; i < channel; ++i){
39        gamma[i] = ((float)rand() / RAND_MAX) * 2 - 1;
40        beta[i] = ((float)rand() / RAND_MAX) * 2 + 0.1;
41    }
42
43    int core_mask = 0b1111;
44    fp_instance_norm_s(input, output, param, core_mask, epsilon);
45    return 0;
46}

私有存储版本:

void fp_instance_norm_p(float *input, float *output, long long *params, float epsilon)
void hp_instance_norm_p(half *input, half *output, long long *params, float epsilon)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3#include <instancenorm.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *input = (float*)0x10010000;
 7    float *gamma = (float*)0x10020000;
 8    float *beta = (float*)0x10030000;
 9    float *output = (float*)0x10040000;
10
11    int batch;
12    int channel;
13    int inner_size;
14
15    batch = 4;
16    channel = 16;
17    inner_size = 8;
18
19    float epsilon = 0.001;
20
21    long long param[10];
22    param[0] = (long long)gamma;
23    param[1] = (long long)beta;
24    param[2] = (long long)batch;
25    param[3] = (long long)channel;
26    param[4] = (long long)inner_size;
27
28    int i, j, k;
29    for (i = 0; i < batch; i++){
30        gamma[i] = 0;
31        beta[i] = 0;
32        for(j = 0; j < channel; j++){
33            for(k = 0; k < inner_size; k++){
34                input[i * channel * inner_size + j * inner_size + k] = (float)rand() / (RAND_MAX + 1.0);
35            }
36        }
37    }
38    for(i = 0; i < channel; ++i){
39        gamma[i] = ((float)rand() / RAND_MAX) * 2 - 1;
40        beta[i] = ((float)rand() / RAND_MAX) * 2 + 0.1;
41    }
42
43    fp_instance_norm_p(input, output, param, epsilon);
44    return 0;
45}